set more off
pause off

/**********************************************************************
* Cheaper Faster and More than Good Enough: 
Is GPS the New Gold Standard in Land Area Measurement -- SRM

** File prepares data from 2013 Nigeria General Household Survey Panel - Farm Area Measurement Validation Study for analysis.
** Data files available at: http://microdata.worldbank.org/index.php/catalog/2842/study-description
** Farm Area Validation data matches with households & plots from the Nigeria GHS-Panel Wave 2 study (data here: http://microdata.worldbank.org/index.php/catalog/1952)						

Note: All variables that could potentially be used to identify survey 
respondents have been withheld from public data release. Syntax that 
references these variables may also have been hidden in order to protect 
respondent confidentiality.							
***********************************************************************/


global ndata 		/*Date from Farm Area Measurement Validation Study*/
global panel_data	/*Data from GHS-Panel Wave 2*/
global shapemetrics	"$ndata/shapemetrics_nigeria_public.dta"

*Define data files
********************************************************************************
use "$ndata\secta.dta", clear
replace hhid=70129 if hhid==70139 & ea==1640
replace hhid=290066 if hhid==29006
replace hhid=220017 if hhid==2217
replace lga=2905 if hhid==290066
tempfile secta
save `secta'
	*Plot level - administrative units, date & time of measurement (Q1-13)

use "$ndata\sectb.dta", clear
replace hhid=70129 if hhid==70139 & ea==1640
replace hhid=290066 if hhid==29006
replace lga=2905 if hhid==290066
replace hhid=220017 if hhid==2217
tempfile sectb
save `sectb'
	*Plot level - crops grown (Q14)

use "$ndata\sectc.dta", clear
replace hhid=70129 if hhid==70139 & ea==1640
replace hhid=290066 if hhid==29006
replace lga=2905 if hhid==290066
replace hhid=220017 if hhid==2217
tempfile sectc
save `sectc'
	*Plot level - SR area (Q15)

use "$ndata\sectc1.dta", clear
replace hhid=70129 if hhid==70139 & ea==1640
replace hhid=290066 if hhid==29006
replace lga=2905 if hhid==290066
replace hhid=220017 if hhid==2217
tempfile sectc1
save `sectc1'
	*Plot-side level - compass & rope bearings and distances (Q16-18)

use "$ndata\sectc2.dta", clear
replace hhid=70129 if hhid==70139 & ea==1640
replace hhid=290066 if hhid==29006
replace lga=2905 if hhid==290066
replace hhid=220017 if hhid==2217
tempfile sectc2
save `sectc2'
	*Plot level - Compass & rope area, GPS area, weather, trees (Q19-25)
********************************************************************************

*Merge all plot-level sections (excludes c1 which is plot bearings data)
use `secta', clear
merge 1:1 hhid q13 using `sectb'
drop _m
merge 1:1 hhid q13 using `sectc'
drop _m
merge 1:1 hhid q13 using `sectc2'
drop if _m==1
drop _m

ren q13 plotid
ren	q15b SRunit

*Clean Up
***********************************************************************************
/*Data Entry Errors - recalculated some CR areas after observing particularly large 
differences between GPS & CR */
	replace q20=11166.2 if hhid==220114 & plotid==3
	replace q20=666.1 	if hhid==70044 	& plotid==1
	replace q20=3787.6	if hhid==70012 	& plotid==2
	replace q20=2977.9	if hhid==70027	& plotid==1
	replace q20=15356.9	if hhid==220004	& plotid==1	
	replace q19=171.4	if hhid==70149	& plotid==2
	replace q20=9616.7 if hhid==290134 & plotid==1
	replace q19=705.06 if hhid==300211 & plotid==3
	
	//Clean Up
	replace q15a=1060 if q15a==10060 & hhid==70118 & plotid==3
	replace SRunit=6 if SRunit==1 & hhid==290104 | hhid==290165 
		// other HH plots reported in acres, assuming unit error (bias is high)

*Convert SR areas
*********************************************************************************
*Need to first bring in zone codes from GHS data
gen zone=1 if state==7 | state==22
replace zone=6 if state==29 | state==30

*First convert SR to hectares using conversion factors in GHS Basic Information Document
	gen SRhect=.
	replace SRhect=q15a*0.0001 		if SRunit==1
	replace SRhect=q15a*0.0667 		if SRunit==5
	replace SRhect=q15a*0.404686 	if SRunit==6
	
	replace SRhect=q15a*0.00012	if SRunit==2 & zone==1
	replace SRhect=q15a*0.00012 if SRunit==2 & zone==6

	replace SRhect=q15a*0.0027	if SRunit==3 & zone==1
	replace SRhect=q15a*0.00001 if SRunit==3 & zone==6

	replace SRhect=q15a*0.00006	if SRunit==4 & zone==1
	replace SRhect=q15a*0.00041 if SRunit==4 & zone==6

*Convert all to acres for ease of comparison
	gen SRacres=SRhect*2.471053814672
	replace SRacres=q15a if SRunit==6
	gen GPS_acres=q22b*0.0002471053814672
	gen CR_acres=q20*0.0002471053814672
	gen SRmeters=SRhect*10000
***********************************************************************************


*********************
** Rename to match **
*********************

	ren q15a SR_quant
	ren	SRunit SR_unit
	ren q19 cr_perimeter
	ren q20 CRarea
	ren q21 closingerr
	ren q22a gps_perimeter
	ren q22b GPSarea
	ren q23 gps_accuracy
	ren q24 weather
	ren q25 treecover
	ren SRacres SR_acres
	
	gen survey=3
	label define SURVEY 1 "MLASS Ethiopia" 2 "Zanzibar Experiment" 3 "Nigeria Experiment" 4 "LSMS-ISA Malawi 2010/11"
	label val survey SURVEY

	gen gps_model="Garmin GPSMap62"
	
	//Break out interview month and year
	split q9b, gen(date) parse(-)
	replace date1="" if date2==""
	gen int_month=date2
	gen int_year=date3
	split q9b, gen(date2) parse(/)
	replace date21="" if date22==""
	replace int_month=date22 if int_month==""
	replace int_year=date23 if int_year==""
	drop date*
	destring int_month, replace
	destring int_year, replace
	label var int_month "month of interview"
	label var int_year "year of interview"

tempfile areas
save `areas'

***********************
** Number of Corners **
***********************

use "$ndata/sectc1.dta", clear
replace hhid=70129 if hhid==70139 & ea==1640

	ren q13 plotid
	*isid hhid plotid q16a
	duplicates drop hhid plotid q16a, force // 17 obs
	
	bysort hhid plotid: egen num_corners=count(hhid)
	collapse (max) num_corners, by(hhid plotid)
	label var num_corners "number of corners in CR measurement"

tempfile sides
save `sides'

***********
** Merge **
***********

use `areas', clear
merge 1:1 hhid plotid using `sides'
drop if _m==2
drop _m

replace hhid=220017 if hhid==2217 & ric==1506

keep hhid plotid state lga sector ea ric SR_quant SR_unit cr_perimeter CRarea ///
	closingerr gps_perimeter GPSarea gps_accuracy weather treecover zone SR_acres ///
	GPS_acres CR_acres survey gps_model int_month int_year num_corners

tempfile validation
save `validation'


************************************************
** HH & Plot Characteristics from wave 2 LSMS **
************************************************


use  "$panel_datasect11b1_plantingw2", clear

	ren s11b1q4 plot_acquired
	replace plot_acquired=. if plot_acquired==9
	lab var plot_acquired "How the plot was acquired (rent, purchased, free use, gift)"
	
	ren s11b1q39 plot_irrigated
	recode plot_irrigated (2=0) (.=0)
	lab var plot_irrigated "Indicator whether the plot is irrigated"

	ren s11b1q7 plot_title
	recode plot_title (2=0) (3=0) (.=0) 
	lab var plot_title "HH Has legal title for plot"
	
	gen plot_collateral=1 if s11b1q19==1 | s11b1q20==1 | s11b1q21==1
	replace plot_collateral=0 if plot_collateral==.
	lab var plot_collateral "HH can sell or use plot as collateral"

	tempfile tenure
	save `tenure'

	
use "$panel_data/sect1_harvestw2", clear
	gen age_years=s1q4
	replace age_years=2011-s1q6_year if s1q6_year!=9999 & (s1q4==. | (2011-s1q6_year-s1q4)<-55)
	recode age_years (999=.)
	replace age_years=99 if age_years>99
	// Uses age verification question where there was a response
	replace age_years = s1q17 if s1q17<=99

	//FIX FOR MULTIPLE HEADS
	gen head = (s1q3==1)
	egen heads=sum(head), by(hhid)
	* Keeps the oldest head in the household as the true head.
	gen headage=head*age_years if head==1 & heads>1
	egen headaget = max(headage), by(hhid)
	replace head=0 if head==1 & heads>1 & age_years!=headaget

	//HEAD GENDER
	g byte malheadt = 1 if (head == 1 & s1q2== 1)
	replace malheadt = 0 if (head == 1 & (s1q2== 2 | s1q2==. | s1q2==3))
	egen hh_sex = max(malheadt) , by(hhid)

	//HEAD AGE (IN YEARS)
	g ageheadt = age_years if head == 1
	egen hh_agey = max(ageheadt) , by(hhid)
	
	//HOUSEHOLD SIZE 
	g byte hhsizet = 1
	egen hhsize = sum(hhsizet) , by(hhid)

	tempfile hh
	save `hh'
	
	use "$panel_data/sect2_plantingw2", clear

	*** YEARS OF EDUCATION ***
	gen school=s2q8
	recode s2q5 (.=1) if s2q8!=.
	recode s2q5 (.=2) if s2q5!=.
	recode s2q5 (.=1) if s2q6!=0 & s2q6!=.

	** Years of schooling variable is based on completed level and highest qualification attained**
	gen edu_years=.
	replace edu_years=0 if (s2q5==2  |s2q5==.)
	replace edu_years=0 if school<=2
	** Primary school**
	replace edu_years=1 if school==11 & edu_years==.
	replace edu_years=2 if school==12 & edu_years==.
	replace edu_years=3 if school==13 & edu_years==.
	replace edu_years=4 if school==14 & edu_years==.
	replace edu_years=5 if school==15 & edu_years==.
	replace edu_years=6 if school==16 & edu_years==.
	** Secondary school**
	replace edu_years=7 if school==21 & edu_years==.
	replace edu_years=8 if school==22 & edu_years==.
	replace edu_years=9 if school==23 & edu_years==.
	replace edu_years=10 if school==24 & edu_years==.
	replace edu_years=11 if school==25 & edu_years==.
	replace edu_years=12 if school==26 & edu_years==.
	**lower six,upper 6 teacher training vocational/technical modern school, NCE, poly/prof 1st degree, higher degree**
	** Quaranic integrated, Quaranic, adult**
	replace edu_years=5 if (school==26|school==27|school==28|school==31|school==32|school==51|school==52|school==34|school==43|school==61)&  s2q8==1& edu_years==.
	replace edu_years=6 if (school==26|school==27|school==28|school==31|school==32|school==51|school==52 |school==34|school==43|school==61)&  s2q8==2& edu_years==.
	replace edu_years=9 if (school==26|school==27|school==28|school==31|school==32|school==51|school==52 |school==34|school==43|school==61)& (s2q8==3 | s2q8==5)& edu_years==.
	replace edu_years=12 if (school==26|school==27|school==28|school==31|school==32|school==51|school==52 |school==34|school==43|school==61)& s2q8==6 & edu_years==.
	replace edu_years=13 if (school==26|school==27|school==28|school==31|school==32|school==51|school==52 |school==34|school==43|school==61)& s2q8==7 & edu_years==.
	replace edu_years=14 if (school==26|school==27|school==28|school==31|school==32|school==51|school==52 |school==34|school==43|school==61)&(s2q8==4 |s2q8==8|s2q8==9|s2q8==10)& edu_years==.
	replace edu_years=14 if school==34  & edu_years==. 
	replace edu_years=16 if school==43 & edu_years==. 
	replace edu_years=12 if school==61 & edu_years==.
	replace edu_years=12 if school==26 & edu_years==. 
	replace edu_years=13 if school==27 & edu_years==.
	replace edu_years=14 if school==28 & edu_years==.
	replace edu_years=14 if school==31 & edu_years==.
	replace edu_years=14 if school==32 & edu_years==. 
	replace edu_years=6 if school==51 & edu_years==.
	replace edu_years=12 if school==52 & edu_years==.    
	**University/post-graduate school**
	replace edu_years=14 if school==33 &  s2q9==8 & edu_years==.
	replace edu_years=16 if school==33 &  s2q9==9 & edu_years==.
	replace edu_years=14 if school==33 & edu_years==.
	replace edu_years=15 if (school==41|school==42)&  s2q9==13 &edu_years==.
	replace edu_years=16 if (school==41|school==42)&  s2q9==9  & edu_years==.
	replace edu_years=18 if (school==41|school==42)&  s2q9==11 & edu_years==.
	replace edu_years=22 if (school==41|school==42)&  s2q9==12 & edu_years==.
	replace edu_years=16 if school==41& edu_years==.
	replace edu_years=16 if school==42& edu_years==.

	replace edu_years=6 if s2q9==2 & edu_years==.
	replace edu_years=9 if s2q9==5 & edu_years==.
	replace edu_years=12 if s2q9==6 & edu_years==.
	replace edu_years=14 if s2q9==8 & edu_years==.
 
	tab edu_years,nolabel miss
	** Check education by age**
	gen edu=.
	replace edu=1 if age_years==6 & edu_years>1 & edu==.
	replace edu=2 if age_years==7 & edu_years>2 & edu==.
	replace edu=3 if age_years==8 & edu_years>3 & edu==.
	replace edu=4 if age_years==9 & edu_years>4 & edu==.
	replace edu=5 if age_years==10 & edu_years>5 & edu==.
	replace edu=6 if age_years==11 & edu_years>6 & edu==.
	replace edu=7 if age_years==12 & edu_years>7 & edu==.
	replace edu=8 if age_years==13 & edu_years>8 & edu==.
	replace edu=9 if age_years==14 & edu_years>9 & edu==.
	replace edu=10 if age_years==15 & edu_years>10 & edu==.
	replace edu=11 if age_years==16 & edu_years>11 & edu==.
	replace edu=12 if age_years==17 & edu_years>12 & edu==.
	replace edu=13 if age_years==18 & edu_years>13 & edu==.
	replace edu=14 if age_years==19 & edu_years>14 & edu==.
	replace edu=15 if age_years==20 & edu_years>15 & edu==.
	replace edu=16 if age_years==21 & edu_years>16 & edu==.
	replace edu=18 if age_years==22 & edu_years>18 & edu==.
	replace edu=18 if age_years==23 & edu_years>18 & edu==.
	replace edu=18 if age_years==24 & edu_years>18 & edu==.
	replace edu= edu_years if edu==.
	replace edu= 0 if edu==.
	* HEAD
	g byte edut = edu if head==1
	egen hh_eduyrs = max(edut), by(hhid)
	drop edut
	* AVERAGE WITHIN HOUSEHOLD
	egen avg_eduyrs = mean(edu) if edu!=., by(hhid)
	bysort hhid: egen max_edu=max(edu)
	label var max_edu "highest education in HH"

	*** EDUCATION LEVEL ***
	gen edu_lvl = 1 if school<3 | (s2q8==2 | s2q8==.)
	replace edu_lvl = 2 if school>=10 & school <=15
	replace edu_lvl = 3 if school==16 | school==27
	replace edu_lvl = 4 if school>=21 & school<=25
	replace edu_lvl = 5 if school==26 | school==28
	replace edu_lvl = 6 if school>=31 & school<=43
	* For teacher training, vocational, Koranic, technical, modern schools use highest level variable where available.
	replace edu_lvl = 1 if (school==27 |school==28 |school==31 |school==32 |school==33 | school==34 | school==35 | school==51 | school==52 | school==61 | school==3 | school==4 | school==17) & s2q8==1
	replace edu_lvl = 3 if (school==27 |school==28 |school==31 |school==32 |school==33 | school==34 | school==35 | school==51 | school==52 | school==61 | school==3 | school==4 | school==17) & s2q8==2
	replace edu_lvl = 4 if (school==27 |school==28 |school==31 |school==32 |school==33 | school==34 | school==35 | school==51 | school==52 | school==61 | school==3 | school==4 | school==17) & (s2q8==3 | s2q8==5)
	replace edu_lvl = 5 if (school==27 |school==28 |school==31 |school==32 |school==33 | school==34 | school==35 | school==51 | school==52 | school==61 | school==3 | school==4 | school==17) & s2q8==6
	replace edu_lvl = 7 if edu_lvl==. & s2q8!=.
	label define edulvl 1 "None/Preschool" 2 "Primary incomplete" 3 "Primary complete" 4 "Secondary incomplete" 5 "Secondary complete" 6 "Post secondary" 7 "Other"
	label values edu_lvl edulvl

	* HEAD
	gen edu_lvlt = edu_lvl if head==1
	egen hh_hghlevel = max(edu_lvlt), by(hhid)
	drop edu_lvlt
	label values hh_hghlevel edulvl
	* HEAD
	gen attendt=attend if head==1
	egen hh_everattd = max(attendt), by(hhid)
	drop attendt
	lab values hh_everattd yn	

	*** LITERACY ***
	gen literate=s2q4
	recode literate (2=0) (6=0) (.=0)
	* HEAD
	gen literatet = literate if head==1
	egen hh_literacy = max(literatet), by(hhid)
	drop literatet
	lab values hh_literacy yn

	lab var hh_literacy "Household head is can read and write in a language"
	lab var hh_hghlevel "Highest level attained by household head"
	lab var hh_eduyrs "Years of education of head"

	keep hhid hh_literacyhh_hghlevel hh_eduyrs

	tempfile hh_ed
	save `hh_ed'
	
	use "$panel_data/sect11d_plantingw2.dta", clear
	***Fertilizer used yes/no?
	rename s11dq1 d_fert
	***replace all 0 values to missing
	replace s11dq4=. if s11dq4==0
	replace s11dq8=. if s11dq8==0
	replace s11dq16=. if s11dq16==0
	replace s11dq28=. if s11dq28==0
	recode d_fert (2=0)

	tempfile fert
	save `fert'

	// cash crop dummy
	use  "$panel_data/secta3_harvestw2.dta", clear
	gen cashcrop_1 = (cropcode==3060 | cropcode==3061 | cropcode==3062 | cropcode==2230 | cropcode==1050 | cropcode==1052 | cropcode==1053 | cropcode==3180 | cropcode==3183 | cropcode==3184 | cropcode==3040 | cropcode==3041 | cropcode==3042 | cropcode==3230)
	label var cashcrop_1 "coffee, sugar cane, cotton, palm oil, cocoa, rubber"
	collapse (max) cashcrop_1, by(hhid plotid)

	tempfile CashCrop
	save `CashCrop'


use `validation'	
merge 1:1 hhid plotid using `tenure'
	keep if _m==3
	drop _m
	
merge 1:1 hhid plotid using `hh'
	keep if _m==3
	drop _m	
	
merge 1:1 hhid plotid using `hh_ed'
	keep if _m==3
	drop _m		
	
merge 1:1 hhid plotid using `fert'
	keep if _m==3
	drop _m	
	
merge 1:1 hhid plotid using `CashCrop'
	keep if _m==3
	drop _m			
	
*********************************************************
** Rename and construct variables to match other files **
*********************************************************

	gen fertilizer=1 if d_fert==1
	replace fertilizer=0 if d_fert==0
	label var fertilizer "fertiilzer is used on plot"
	
	replace manager_age=. if manager_age==1 
	ren manager_eduyrs manager_yrsed
	
	gen irrigated=1 if plot_irrigated==1
	replace irrigated=0 if plot_irrigated==0
	label var irrigated "plot is irrigated"

	gen rented=1 if plot_acquired==2
	replace rented=0 if plot_acquired!=2 & plot_acquired!=.
	label var rented "plot is rented"
	
	ren plot_title 		title
	ren plot_collateral collateral
	
	gen head_female=1 if hh_sex==0
	replace head_female=0 if hh_sex==1
	
	ren hh_agey 	head_age
	ren hh_eduyrs 	head_yrsed
	ren hh_literacy head_literate
	
	gen urban=(sector==1)

	**Gen variable for number of plots cultivated per household
	preserve
		use "$panel_data/sect11b1_plantingw2.dta", clear
		gen cultivated=1 if s11b1q27==1
		bysort hhid: egen num_cult_fields=total(cultivated)
		keep hhid num_cult_fields
		duplicates drop
		tempfile cult
		save `cult'
	restore
	
	merge m:1 hhid using `cult'
	drop if _m==2
	drop _m
	replace num_cult_field=1 if num_cult_field==. & hhid==290150

	order zone state lga urban ea ric hhid plotid SR_quant SR_unit SR_acres CR_acres ///
	cr_perimeter CRarea closingerr num_corners GPS_acres gps_perimeter GPSarea gps_accuracy ///
	title collateral fertilizer irrigated rented head_age head_yrsed head_literate

****************************
**Clean Up of Wave 2 Areas**
****************************

		gen w2_GPS_acres=w2_GPS*0.0002471053814672
		replace w2_SR_quant=5 if w2_SR_quant==500 & hhid==290117 & plotid==1
		replace w2_SR_acres=5 if hhid==290117 & plotid==1
		replace w2_SR_quant=4 if w2_SR_quant==400 & hhid==290126 & plotid==2
		replace w2_SR_acres=4 if hhid==290126 & plotid==2
		replace w2_GPS_acres=1.139724 if w2_GPS_acres>=11.397 & hhid==300185 & plotid==4


//Keep obs with objective measurements	
keep if GPS_acres!=. & CR_acres!=.	

*************************
**Gen LEVELS of CR area**
*************************
	gen level_cr=.
	replace level_cr=1 if CR_acres<0.06177634536679  								// 250 sq. meters / 0.025 ha
	replace level_cr=2 if CR_acres>=0.06177634536679 & CR_acres<0.1235526907336 	// 500 sq. meters / 0.05 ha
	replace level_cr=3 if CR_acres>=0.1235526907336 & CR_acres<0.3706580722008 		// 1500 sq. meters / 0.15 ha
	replace level_cr=4 if CR_acres>=0.3706580722008 & CR_acres<0.6177634536679 		// 2500 sq. meters / 0.25 ha
	replace level_cr=5 if CR_acres>=0.6177634536679 & CR_acres<1.235526907336  		// 5000 sq. meters / 0.5 hectare
	replace level_cr=6 if CR_acres>=1.235526907336  & CR_acres!=. 
	
	//levels in ACRES
	gen level_cr_acres=.
	replace level_cr_acres=1 if CR_acres<0.05  								
	replace level_cr_acres=2 if CR_acres>=0.05 & CR_acres<0.15 	
	replace level_cr_acres=3 if CR_acres>=0.15 & CR_acres<0.35		
	replace level_cr_acres=4 if CR_acres>=0.35 & CR_acres<0.75		
	replace level_cr_acres=5 if CR_acres>=0.75 & CR_acres<1.25 		
	replace level_cr_acres=6 if CR_acres>=1.25  & CR_acres!=. 

**************************
**Gen LEVELS of GPS area**
**************************
	gen level_gps=.
	replace level_gps=1 if GPS_acres<0.06177634536679  								// 250 sq. meters / 0.025 ha
	replace level_gps=2 if GPS_acres>=0.06177634536679 & GPS_acres<0.1235526907336 	// 500 sq. meters / 0.05 ha
	replace level_gps=3 if GPS_acres>=0.1235526907336 & GPS_acres<0.3706580722008 		// 1500 sq. meters / 0.15 ha
	replace level_gps=4 if GPS_acres>=0.3706580722008 & GPS_acres<0.6177634536679 		// 2500 sq. meters / 0.25 ha
	replace level_gps=5 if GPS_acres>=0.6177634536679 & GPS_acres<1.235526907336  		// 5000 sq. meters / 0.5 hectare
	replace level_gps=6 if GPS_acres>=1.235526907336  & GPS_acres!=. 

	//levels in ACRES
	gen level_gps_acres=.
	replace level_gps_acres=1 if GPS_acres<0.05  								
	replace level_gps_acres=2 if GPS_acres>=0.05 & GPS_acres<0.15 	
	replace level_gps_acres=3 if GPS_acres>=0.15 & GPS_acres<0.35		
	replace level_gps_acres=4 if GPS_acres>=0.35 & GPS_acres<0.75		
	replace level_gps_acres=5 if GPS_acres>=0.75 & GPS_acres<1.25 		
	replace level_gps_acres=6 if GPS_acres>=1.25  & GPS_acres!=. 

	label define level_acres 1 "<0.05 acres" 2 "<0.15 acres" 3 "<0.35 acres" ///
		4 "<0.75 acres" 5 "<1.25 acres" 6 ">=1.25 acres"
	label val level_cr_acres level_acres
	label val level_gps_acres level_acres

	label define level 1 "<250 sq. meters / 0.025 ha" 2 "<500 sq. meters / 0.05 ha" 3 "<1500 sq. meters / 0.15 ha" ///
		4 "<2500 sq. meters / 0.25 ha" 5 "<5000 sq. meters / 0.5 hectare" 6 ">=5000 sq. meters / 0.5 hectare"
	label val level_cr level
	label val level_gps level

**************************
** Gen "bias" variables **
**************************

	gen bias_gps = SR_acres-GPS_acres
	label var bias_gps "SR_acres-GPS_acres"		

	gen bias_cr = GPS_acres-CR_acres
	label var bias_cr "GPS_acres-CR_acres"		

	gen abs_bias_cr=abs(bias_cr)
	gen per_bias_cr=(bias_cr/CR_acre)*100
	gen abs_per_cr=(abs/CR_acre)*100	
		label var abs_bias_cr "absolute val. GPS - CR (acres)"
		label var per_bias_cr "relative bias (bias_cr/CR_acre * 100)"
		label var abs_per_cr "absolute val. relative bias, (|bias_cr|/CR_acre * 100)"

***************************************
**Impute missings for select variables*
***************************************
	
	//Number of corners missing for 2 obs 
	foreach var in num_corners  {
		bysort level_cr: egen mode`var'=mode(`var')
		replace `var'=mode`var' if `var'==.
		drop mode`var'
	}
	
	//gps_accuracy missing for 1 obs
	foreach var in gps_accuracy  {
		bysort lga: egen avg`var'=mean(`var')
		replace `var'=avg`var' if `var'==.
		drop avg`var'
	}
	
	//some HH & manager variables missing (impute at HH level first, <15 obs for each))
	foreach var in manager_age manager_sex fertilizer rented  manager_head {
		bysort hhid: egen mode`var'=mode(`var')
		replace `var'=mode`var' if `var'==.
		drop mode`var'
	}
	foreach var in manager_sex fertilizer rented  manager_head {
		bysort lga: egen mode`var'=mode(`var')
		replace `var'=mode`var' if `var'==.
		drop mode`var'
	}
	foreach var in manager_age  {
		bysort lga: egen avg`var'=mean(`var')
		replace `var'=avg`var' if `var'==.
		drop avg`var'
	}
*************************************
** Gen aggregated SR unit variable **
*************************************

	label define SR_UNIT_AG 1 "Acre" 2 "Hectare" 3 "Sq. Meter" 4 "Non-Standard Unit"
	codebook SR_unit
	gen SR_unit_ag=3 if SR_unit==1 
	replace SR_unit_ag=1 if SR_unit==6
	replace SR_unit_ag=4 if SR_unit>=2 & SR_unit<=5
	label val SR_unit_ag SR_UNIT_AG
	
	gen standard_unit=(SR_unit_ag<4)
	label var standard_unit "SR in standard unit"
	
	gen level_corner=.
	replace level_corner=1 if num_corners<=4
	replace level_corner=2 if num_corners>4 & num_corners<10
	replace level_corner=3 if num_corners>=10 & num_corners!=.
	label define CORNERS 1 "<= 4 sides" 2 "5 - 9 sides" 3 ">= 10 sides"
	label val level_corner CORNERS

	*gen w2-validation vars
		gen diff_SR=w2_SR_acres-SR_acres
		label var diff_SR "w2_SR_acres-SR_acres"
		gen diff_GPS=w2_GPS_acres-GPS_acres
		label var diff_GPS "w2_GPS_acres-GPS_acres"
			
	**Terciles by CR area
		xtile tercile_cr=CR_acres, nq(3)

 	gen weather2=1 if weather==1 | weather==2 
	replace weather2=2 if weather==3 | weather==4 | weather==5 | weather==6 
	label define WEATHER2 1 "Clear/Partly Cloudy" 2 "Mostly Cloudy/All Cloudy/Rainy"
	label values weather2 WEATHER2
	label var weather2 "weather collapsed"
	
	gen CR2=CR_acres^2
	gen CR3=CR_acres^3
	

***********************
**Merge Shape Metrics**
***********************	
	
	merge 1:1 hhid plotid using "$shapemetrics"
	drop if _m==2 
	
*******************************************
**TRIM TOP 1% OF ABSOLUTE VALUE OF % BIAS**
*******************************************	
	
	gen abs_bias_gps=abs(bias_gps)
	gen per_bias_gps=(bias_gps/GPS_acre)*100
	gen abs_per_gps=(abs_bias_gps/GPS_acre)*100
	
	sum abs_per_cr, d
	gen flagp1_cr=1 if abs_per_cr<r(p1)
	gen flagp99_cr=1 if abs_per_cr>r(p99) & abs_per_cr!=.

	sum abs_per_gps, d
	gen flagp1_gps=1 if abs_per_gps<r(p1)
	gen flagp99_gps=1 if abs_per_gps>r(p99) & abs_per_gps!=.

	drop if flagp99_cr==1 | flagp99_gps==1
	// DROPS 8 obs 

//1 outlier in closing error	
drop if closingerr>5

count //485

*save "$nga_data/CheaperFaster_Nigeria.dta", replace


